package main

import (
	"database/sql"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"time"

	_ "github.com/go-sql-driver/mysql" // 使用 _ 匿名加载 MySQL 驱动包
	"github.com/lisgroup/easyhttp"
)

type Params map[string]string

func SpiderDouBan(index int, ch ...chan int) {
	// body := map[string]string{}
	urls := "https://movie.douban.com/top250?start=" + strconv.Itoa((index-1)*25) + "&filter="
	headers := map[string]interface{}{"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"}

	options := easyhttp.Options{
		Headers: headers,
	}
	result, err := easyhttp.NewClient().Request(urls, "GET", options)
	// fmt.Println(result, err)
	if err != nil {
		fmt.Println("HttpGet err: ", err)
	}
	// 使用正则匹配
	regExp := `<div class="item">[\s\S]*?<div class="pic">[\s\S]*?<em class="">(.*?)<\/em>[\s\S]*?<a href=".*?">[\s\S]*?<img width=".*?" alt="(.*?)" src="(.*?)" class=".*?">[\s\S]*?div class="info[\s\S]*?class="hd"[\s\S]*?class="title">(.*?)<\/span>[\s\S]*?class="other">(.*?)<\/span>[\s\S]*?<div class="bd">[\s\S]*?<p class=".*?">([\s\S]*?)<br>([\s\S]*?)<\/p>[\s\S]*?span class="rating_num".*?average">(.*?)<\/span>`
	content := findByRegexp(regExp, result.Content)
	// fmt.Println(content)
	// var newSlice = make([][]string, len(content))
	// for key, val := range content {
	//	newSlice[key] = val[1:]
	// }
	// fmt.Println(newSlice)
	// 入库操作
	insert(content)
	// return content
	// chan 记录
	if len(ch) > 0 {
		ch[0] <- index
	}
}

// 通过正则表达式查找内容
func findByRegexp(regExp, result string) [][]string {
	// 使用正则匹配
	find := regexp.MustCompile(regExp)
	// return find.FindAllString(result, -1) // 仅返回匹配的数据
	return find.FindAllStringSubmatch(result, -1) // 返回匹配的详细二维切片
}

/**
CREATE TABLE `top250` (
  `id` int(20) NOT NULL AUTO_INCREMENT,
  `title` varchar(20) DEFAULT '',
  `image` varchar(100) DEFAULT '',
  `subtitle` varchar(255) DEFAULT '',
  `other` varchar(255) DEFAULT NULL,
  `personnel` varchar(255) DEFAULT '',
  `info` varchar(255) DEFAULT '',
  `score` varchar(10) DEFAULT '',
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4;
*/
func insert(movies [][]string) {
	// 存放 (?, ?, ...) 的slice
	valueStrings := make([]string, 0, len(movies))
	// 存放values的slice
	valueArgs := make([]interface{}, 0, len(movies)*8)
	// 遍历切片准备数据
	for _, val := range movies {
		// 占位符
		valueStrings = append(valueStrings, "(?, ?, ?, ?, ?, ?, ?, ?)")
		for i := 1; i < len(val); i++ {
			valueArgs = append(valueArgs, val[i])
		}
	}

	db, err := sql.Open("mysql",
		"root:root@tcp(127.0.0.1:3306)/test")
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()

	// 自行拼接要执行的具体语句
	sqlName := fmt.Sprintf("INSERT INTO `top250` (id,title,image,subtitle,other,personnel,info,score) VALUES %s",
		strings.Join(valueStrings, ","))
	fmt.Println(sqlName)
	// insert
	stmt, err := db.Prepare(sqlName)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(valueArgs)
	res, err := stmt.Exec(valueArgs...)
	if err != nil {
		log.Fatal(err)
	}
	lastId, err := res.LastInsertId()
	if err != nil {
		log.Fatal(err)
	}
	rowCnt, err := res.RowsAffected()
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("ID = %d, affected = %d\n", lastId, rowCnt)

}

func HttpGet(url string) (result string, err error) {
	// 反爬虫 418 错误码
	resp, err1 := http.Get(url)
	if err1 != nil {
		err = err1
		return
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return "", fmt.Errorf("get content failed status code is %d ", resp.StatusCode)
	}
	// result, err2 := ioutil.ReadAll(resp.Body)
	// if err2 != nil {
	//	err = err2
	//	return
	// }
	// result = string(result)
	buf := make([]byte, 4096)
	for {
		n, err2 := resp.Body.Read(buf)
		if n == 0 {
			fmt.Println("读取网页完成")
			break
		}
		if err2 != nil && err2 != io.EOF {
			err = err2
			return
		}
		// 累加每一次循环读到的 buf 数据，存入result 一次性返回
		result += string(buf[:n])
	}

	fmt.Println(result)
	return
}

func Request1(url, method string, headers Params, timeout time.Duration) (result string, err error) {
	client := &http.Client{
		Timeout: timeout * time.Second,
	}
	// 提交请求
	request, err1 := http.NewRequest(method, url, nil)
	if err1 != nil {
		err = err1
		return
	}
	// 增加header头信息
	for key, val := range headers {
		request.Header.Add(key, val)
	}
	// 处理返回结果
	response, _ := client.Do(request)
	defer response.Body.Close()
	if response.StatusCode != http.StatusOK {
		return "", fmt.Errorf("get content failed status code is %d ", response.StatusCode)
	}
	res, err2 := ioutil.ReadAll(response.Body)
	if err2 != nil {
		err = err2
		return
	}
	fmt.Println(response.Body)
	fmt.Println(string(res))

	// 将结果定位到标准输出 也可以直接打印出来 或者定位到其他地方进行相应的处理
	// stdout := os.Stdout
	// _, err = io.Copy(stdout, response.Body)

	// 返回的状态码
	status := response.StatusCode

	fmt.Println(status)

	return "response", nil
}

func main() {
	// 固定爬取的起始页 TOP250 每页25条共10页
	start := 1
	end := 3

	channel := make(chan int)
	for i := start; i <= end; i++ {
		go SpiderDouBan(i, channel)
	}
	for i := start; i <= end; i++ {
		fmt.Println("第" + strconv.Itoa(<-channel) + "页任务完成")
	}
}
